numbers = seq(1:6)
sepPlotTrain=list()
AIC = list()
BIC = list()
for(ii in 1:k){
  
  train = indiadata[indiadata$rand !=ii,]  
  test=indiadata[indiadata$rand==ii,] 
  modelnoCL = glm(logitform, data=train, family=binomial(link="logit"))
  
  
  models[[ii]]= cl(train, modelnoCL, train$state)
  model=cl(train, modelnoCL, train$state)
  
  #coefplot(model[,1], model[,1], intercept = TRUE, xlim=c(-35,35), col.pts = colors[ii], main = "Coefficient Estimates, Colored by Fold", add=TRUE, offset=spacing[ii])
  
  #generating predicted values
  trainBeta = coef(modelnoCL)
  X = data.matrix( cbind(1, train[, names(trainBeta)[2:length(trainBeta)] ] ) )
  predVals = X %*% trainBeta
  predProbs = 1/( 1 + exp(-predVals) )
  
  
  # Running some perf statistics (logLikelihood, AIC, BIC)
  #logLike = sum(test$statehood * log(predProbs) + (1 - test$statehood)*log(1-predProbs))
  
  # Using this we can calculate the AIC and BIC
  # AIC: -2 * log-likelihood + 2 * npar
  #AIC[[ii]] = -2 * logLike + 2 * length(coef(modelnoCL)) # AIC(m2)
  # BIC: -2 * log-likelihood + log(nrow(data)) * npar
  #BIC[[ii]] = -2 * logLike + log(nrow(test)) * length(coef(modelnoCL))
  
  sepDataTrain = data.frame(prob = predProbs, dv = train$statehood)
  sepDataTrain = sepDataTrain[order(sepDataTrain$prob), ]
  col = c("grey94", "midnightblue")
  
  sepPlotTrain[[ii]] = ggplot(data=sepDataTrain, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
  # Create a rectangular plot to fill
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + geom_rect(fill = "grey94")
  # Add predicted probability line
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + geom_line(aes(y=prob, x=1:length(dv)), lwd=.8)
  # Add lines to denote events
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=1)
  # Color event lines
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + scale_color_manual(values = col)
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + scale_y_continuous("", breaks =NULL)
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + scale_x_continuous("", breaks = NULL)
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + theme(
    axis.ticks = element_blank(),
    legend.position = "none", 
    panel.background = element_blank(), 
    panel.grid = element_blank() )
  title =  paste0("Separation Plot for Training Set, Fold ", numbers[ii])
  sepPlotTrain[[ii]] = sepPlotTrain[[ii]] + ggtitle(title)
  sepPlotTrain[[ii]]
}

#combined plot
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
  require(grid)
  
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots == 1) {
    print(plots[[1]])
    
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}


combinedSepPlots = multiplot(sepPlot[[1]], sepPlot[[2]], sepPlot[[3]], sepPlot[[4]], sepPlot[[5]], sepPlot[[6]], cols=2)
combinedSepTrain = multiplot(sepPlotTrain[[1]], sepPlotTrain[[2]], sepPlotTrain[[3]], sepPlotTrain[[4]], sepPlotTrain[[5]], sepPlotTrain[[6]], cols=2)


######### interaction effects visualization #############
#######predicting effect representation on statehood
means=apply(indiadata[,ivs], 2, function(x){ mean(x, na.rm=TRUE) })
minMaxSep=quantile(indiadata[,ivs[3]], probs=c(0,1), na.rm=TRUE)

scens=rbind(c(1, means[1:2], minMaxSep[1], means[4:length(means)]),
            c(1, means[1:2], minMaxSep[2], means[4:length(means)]) )

# Simulate additional parameter estimates from multivariate normal
sims=1000
draws = mvrnorm(sims, coef(mlogit1), vcov(mlogit1))

# Get predicted values using matrix multiplication
preds = draws %*% t(scens)

#plotting sim results
plot(density(preds[,1]), col = "red", bty = "n", 
     las = 1, xlim=c(-10, 10), lwd = 3, main='', ylim=c(0,1),
     xlab = "Statehood")
lines(density(preds[,2]), col = "blue", lwd = 3)
title("Figure 1: Relative Representation Effects on Statehood")
legend("topleft", title="Marginal Effects of Congressional Representation", c("Low Rep", "High Rep"),
       lty=c(1,1,1,1), col=c("red","blue"), cex=.8) 

### plotting india graph ###
library(raster)
library(rgdal)
library(rgeos)
library(ggplot2)
library(dplyr)
library(nlme)

### Q1: Assam only

### Get data
india <- getData("GADM", country = "India", level = 1)

#sep plot for baseline model
#generating predicted values. use mlogit1
trainBetaAll = coef(mlogit1)
Xall = data.matrix( cbind(1, indiadata[, names(trainBetaAll)[2:length(trainBetaAll)] ] ) )
predValsAll = Xall %*% trainBetaAll
predProbsAll = 1/( 1 + exp(-predValsAll) )


#setting up to plot

sepDataAll = data.frame(prob = predProbsAll, dv = indiadata$statehood)
sepDataAll = sepDataAll[order(sepDataAll$prob), ]
col = c("grey94", 
       "midnightblue") 

#graphing
sepPlotAll = ggplot(data=sepDataAll, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
# Create a rectangular plot to fill
sepPlotAll = sepPlotAll + geom_rect(fill = "grey94")
# Add predicted probability line
sepPlotAll = sepPlotAll + geom_line(aes(y=prob, x=1:length(dv)), lwd=.9)
# Add lines to denote events
sepPlotAll = sepPlotAll + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=.9)
# Color event lines
sepPlotAll = sepPlotAll + scale_color_manual(values = col)
sepPlotAll= sepPlotAll + scale_y_continuous("", breaks = NULL)
sepPlotAll = sepPlotAll + scale_x_continuous("", breaks = NULL)
sepPlotAll = sepPlotAll + theme(
  axis.ticks = element_blank(),
  legend.position = "none", 
  panel.background = element_blank(), 
  panel.grid = element_blank() )

sepPlotAll = sepPlotAll + ggtitle("Separation Plot for Baseline Model")
sepPlotAll


##########k fold for no interaction term
colors = c("pink","red","orange","yellow","green","dark green","light blue","blue","purple")
spacing = c(.1, .2, .3, .4, .5, .6)
names = c("Intercept", "Violence",
          "Rel. INC rep", "Dem. polarization", 
          "Absolute level INC rep", "Enclave pop.", "Hindu pop.", 
          "Dist. to Capital")
coefplot(logitmodNIcl[,1], logitmodNIcl[,1],  varnames=names, xlim=c(-65,45),
         main = "Coefficient Estimates, No Interaction")
numbers = seq(1:6)
sepPlotNI=list()
AICni = list()
BICni = list()
for(ii in 1:k){
  
  train = indiadata[indiadata$rand !=ii,]  
  test=indiadata[indiadata$rand==ii,] 
  modelnoI = glm(logitformNoInteraction, data=train, family=binomial(link="logit"))
  
  
  models[[ii]]= cl(train, modelnoI, train$state)
  model=cl(train, modelnoI, train$state)
  
  coefplot(model[,1], model[,1], intercept = TRUE, xlim=c(-35,35), col.pts = colors[ii], main = "Coefficient Estimates, Colored by Fold", add=TRUE, offset=spacing[ii])
  
  #generating predicted values
  trainBeta = coef(modelnoI)
  X = data.matrix( cbind(1, train[, names(trainBeta)[2:length(trainBeta)] ] ) )
  predVals = X %*% trainBeta
  predProbs = 1/( 1 + exp(-predVals) )
  
  
  # Running some perf statistics (logLikelihood, AIC, BIC)
  logLike = sum(test$statehood * log(predProbs) + (1 - test$statehood)*log(1-predProbs))
  
  # Using this we can calculate the AIC and BIC
   #AIC -2 * log-likelihood + 2 * npar
  AICni[[ii]] = -2 * logLike + 2 * length(coef(modelnoI)) # AIC(m2)
  # BIC: -2 * log-likelihood + log(nrow(data)) * npar
  BICni[[ii]] = -2 * logLike + log(nrow(test)) * length(coef(modelnoI))
  
  sepDataTrain = data.frame(prob = predProbs, dv = train$statehood)
  sepDataTrain = sepDataTrain[order(sepDataTrain$prob), ]
  col = c("grey94", "midnightblue")
  
  sepPlotNI[[ii]] = ggplot(data=sepDataTrain, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
  # Create a rectangular plot to fill
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + geom_rect(fill = "grey94")
  # Add predicted probability line
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + geom_line(aes(y=prob, x=1:length(dv)), lwd=.8)
  # Add lines to denote events
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=1)
  # Color event lines
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + scale_color_manual(values = col)
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + scale_y_continuous("", breaks = c(0, 0.25, 0.5, 0.75, 1.0))
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + scale_x_continuous("", breaks = NULL)
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + theme(
    axis.ticks = element_blank(),
    legend.position = "none", 
    panel.background = element_blank(), 
    panel.grid = element_blank() )
  title =  paste0("Separation Plot for No Interaction, Fold ", numbers[ii])
  sepPlotNI[[ii]] = sepPlotNI[[ii]] + ggtitle(title)
  sepPlotNI[[ii]]
}
combinedSepPlotsNI = multiplot(sepPlotNI[[1]], sepPlotNI[[2]], sepPlotNI[[3]], sepPlotNI[[4]], sepPlotNI[[5]], sepPlotNI[[6]], cols=2)


#no interaction, test set
modelsNI = list(list())
colors = c("pink","red","orange","yellow","green","dark green","light blue","blue","purple")
spacing = c(.1, .2, .3, .4, .5, .6)
names = c("Intercept", "Viol*Rel INC Rep", "Violence",
          "Rel. INC rep", "Dem. polarization", 
          "Absolute level INC rep", "Enclave pop.", "Hindu pop.", 
          "Dist. to Capital")
numbers = seq(1:6)
#coefplot(logitmodNIcl[,1], logitmodNIcl[,1],  varnames=names, xlim=c(-65,45),
#         main = "Coefficient Estimates, No Interaction")
AICtestNI = list()
BICtestNI = list()
sepPlotNItest=list()

#originally formula was logitformNoInteraction
for(ii in 1:k){
  
  train = indiadata[indiadata$rand !=ii,]  
  test=indiadata[indiadata$rand==ii,] 
  modelnoCLni = glm(logitformNoInteraction, data=train, family=binomial(link="logit"))
  
  
  modelsNI[[ii]]= cl(train, modelnoCLni, train$state)
  model=cl(train, modelnoCLni, train$state)
  
  #coefplot(model[,1], model[,1], intercept = TRUE, xlim=c(-35,35), col.pts = colors[ii], main = "Coefficient Estimates, Colored by Fold", add=TRUE, offset=spacing[ii])
  
  #generating predicted values
  trainBeta = coef(modelnoCLni)
  X = data.matrix( cbind(1, test[, names(trainBeta)[2:length(trainBeta)] ] ) )
  predVals = X %*% trainBeta
  predProbs = 1/( 1 + exp(-predVals) )
  
  # Running some perf statistics (logLikelihood, AIC, BIC)
  #logLike = sum(test$statehood * log(predProbs) + (1 - test$statehood)*log(1-predProbs))
  
  # Using this we can calculate the AIC and BIC
  # AIC: -2 * log-likelihood + 2 * npar
  #AIC[[ii]] = -2 * logLike + 2 * length(coef(modelnoCLni)) # AIC(m2)
  # BIC: -2 * log-likelihood + log(nrow(data)) * npar
  #BIC[[ii]] = -2 * logLike + log(nrow(test)) * length(coef(modelnoCLni))
  
  #setting up to plot
  
  sepDataNItest = data.frame(prob = predProbs, dv = test$statehood)
  sepDataNItest = sepDataNItest[order(sepDataNItest$prob), ]
  col = c("grey94", "midnightblue") 
  
  #graphing
  sepPlotNItest[[ii]] = ggplot(data=sepDataNItest, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
  # Create a rectangular plot to fill
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + geom_rect(fill = "grey94")
  # Add predicted probability line
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + geom_line(aes(y=prob, x=1:length(dv)), lwd=.8)
  # Add lines to denote events
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=.8)
  # Color event lines
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + scale_color_manual(values = col)
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + scale_y_continuous("", breaks = c(0, 0.25, 0.5, 0.75, 1.0))
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + scale_x_continuous("", breaks = NULL)
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + theme(
    axis.ticks = element_blank(),
    legend.position = "none", 
    panel.background = element_blank(), 
    panel.grid = element_blank() )
  title =  paste0("Test Set, No Interaction: Fold ", numbers[ii])
  sepPlotNItest[[ii]] = sepPlotNItest[[ii]] + ggtitle(title)
  sepPlotNItest[[ii]]
}

combinedSepPlotsNItest = multiplot(sepPlotNItest[[1]], sepPlotNItest[[2]], sepPlotNItest[[3]], sepPlotNItest[[4]], sepPlotNItest[[5]], sepPlotNItest[[6]], cols=2)



###############################333
#testing how much other variables matter

ivsNV = c('LNRelINCRep', 'DemPol', 
        'lnenclINCrep', 'LNenclpop', 'Hindu', 'lnkmCAP')
logitformNV = formula(paste0( dv, ' ~ ', paste(ivsNV, collapse=' + ') ))

mlogitNV = glm(logitformNV, data=indiadata, family=binomial(link="logit"))
summary(mlogitNV)
lrtest(mlogit1, mlogitNV)

ivsNone = c('DemPol', 
          'lnenclINCrep', 'LNenclpop', 'Hindu', 'lnkmCAP')
logitformnone = formula(paste0( dv, ' ~ ', paste(ivsNone, collapse=' + ') ))
mlogitNone = glm(logitformnone, data=indiadata, family=binomial(link="logit"))
summary(mlogitNone)
lrtest(mlogit1, mlogitNone)

mlogitsingle = glm(statehood ~ lnkmCAP, data=indiadata, family =binomial(link="logit")) #aic 72
lrtest(mlogit1, mlogitsingle) #lrtest only -34


###generatign bar plot with this information
AICscores = c(67.795, 67.839, 66.723, 65.223)
LnLikeScores = c(-25, -25.9, -26.3, -26.6)
modelNum= c(1, 2, 3, 4)

bardata = t(as.matrix(cbind(AICscores, LnLikeScores)))
names(bardata) = c("AIC", "LnLikelihood", "Model")
barplot = barplot(bardata, beside=TRUE, main="Alternative Model Performance Statistics", 
                  col=c("blue4", "lightskyblue2"), ylim=c(-45, 85), xlab="labels",
                  names.arg=c("1", "2", "3", "4"))
col=c("blue4", "lightskyblue2")
